#'
#'  Project title:     'Sovereign Risk and Government Change: Elections, Ideology and Experience'
#'  Authors:           Sarah M. Brooks; Raphael Cunha; Layna Mosley
#'  File description:  Data preparation for heteroskedastic regressions of monthly EMBI/CDS spreads
#'  Output:            'EMBIG and CDS Dataset.RData'
#'

# my_packages <- c("plyr", "tidyverse", "haven",
#                  "reshape2", "countrycode", "tidyquant",
#                  "Quandl", "WDI", "igraph", "rio")
# 
# install.packages(my_packages)

library(foreign)
library(rio)
library(haven)
library(plyr)
library(tidyverse)
library(reshape2)
library(countrycode)
library(tidyquant)
library(Quandl)
library(WDI)
library(igraph)

# Set directories

DATADIR <- "~/.../Data/Monthly"

# Function for percentage changes/returns

Ret <- function(x){ #Daily return function
  
  c(NA, x[2:length(x)]/x[1:length(x)-1] - 1)
  
}


# Load and format EMBI and CDS spreads ------------------------------------

spread.dat <- read.csv(file = file.path(DATADIR, "Monthly EMBIG and CDS Spreads Updated 2018-05-09.csv"),
                      stringsAsFactors = FALSE)

spread.dat <- spread.dat[order(spread.dat$country, spread.dat$year, spread.dat$month), ]

spread.dat$ccodecow <- countrycode(spread.dat$country, origin = "country.name", destination = "cown")
spread.dat$year <- as.character(spread.dat$year)
spread.dat$month <- format(as.yearmon(spread.dat$month_year, "%m-%Y"), "%m")


# Load global economic variables ------------------------------------------

# US interest rates
# 10-year Treasury constant maturity rate

getSymbols("GS10", src = "FRED")
treasury10y <- as.data.frame(GS10)
treasury10y$date <- as.Date(row.names(treasury10y))
treasury10y$month_year <- format(treasury10y$date, "%m-%Y")
treasury10y$treasury10y <- treasury10y$GS10
treasury10y$treasury10y <- na.locf(treasury10y$treasury10y)
treasury10y$d_treasury10y <- c(NA, diff(treasury10y$treasury10y))

spread.dat <- left_join(spread.dat, treasury10y[c("month_year", "treasury10y", "d_treasury10y")],
                        by = "month_year")
rm(GS10, treasury10y)

# Equity premium: S&P 500 P/E Ratio

eqprem <- Quandl("MULTPL/SP500_PE_RATIO_MONTH", api_key = "yQutv1RcK8_Choy78PoN")
eqprem$month_year <- format(eqprem$Date, "%m-%Y")
eqprem$eqprem <- eqprem$Value
eqprem$d_eqprem <- c(NA, diff(eqprem$eqprem))

spread.dat <- left_join(spread.dat, eqprem[c("month_year", "eqprem", "d_eqprem")],
                        by = "month_year", type = "left")
rm(eqprem)

# Commodity prices

commod <- Quandl(c("COM/WLD_IENERGY", "COM/WLD_INONFUEL"), api_key = "yQutv1RcK8_Choy78PoN")
colnames(commod) <- c("date", "energy_index", "nonfuel_index")
commod$month_year <- format(commod$date, "%m-%Y")
spread.dat <- left_join(spread.dat, commod[c("month_year", "energy_index", "nonfuel_index")],
                        by = "month_year")
rm(commod)

# Global default rate

globaldefault <- read.csv(file = file.path(DATADIR, "Database of Sovereign Defaults (Bank of Canada) 2017-06-30.csv"),
                          stringsAsFactors = FALSE)
globaldefault$year <- as.character(globaldefault$year)
spread.dat <- left_join(spread.dat, globaldefault[c("year", "globaldefault")],
                        by = "year")
rm(globaldefault)

# VIX

vix <- tq_get("^VIX",
              get = "stock.prices",
              from = "1990-01-01") %>%
  tq_transmute(select = close,
                mutate_fun = to.period,
               period = "months") %>%
  mutate(vix = close,
         d_vix = c(NA, diff(vix)),
         month_year = format(date, "%m-%Y")) %>%
  as.data.frame()

spread.dat <- left_join(spread.dat, vix[c("month_year", "vix", "d_vix")],
                        by = "month_year")
rm(vix)


# Country-level variables -------------------------------------------------

# Exchange rates (BIS)

fxrate <- read.csv(file = file.path(DATADIR, "Exchange Rates Monthly (BIS).csv"),
                   stringsAsFactors = FALSE)

fxrate <- subset(fxrate, Frequency == "Monthly" & COLLECTION == "E",
                 select = -c(FREQ, Frequency, REF_AREA, CURRENCY, Currency, Collection, COLLECTION, Time_Period))

fxrate <- melt(fxrate, id.vars = "Reference_area", value.name = "fxrate", variable.name = "year_month")

fxrate$country <- fxrate$Reference_area
fxrate$year_month <- gsub("X", "", fxrate$year_month)
fxrate$year_month <- gsub("\\.", "-", fxrate$year_month)
fxrate$date <- as.Date(as.yearmon(fxrate$year_month, "%Y-%m"))
fxrate$month_year <- format(fxrate$date, "%m-%Y")
fxrate$iso3c <- countrycode(fxrate$country, origin = "country.name", destination = "iso3c")

fxrate <- fxrate[order(fxrate$country, fxrate$date), ]

fxrate <- fxrate %>%
  group_by(iso3c) %>%
  mutate(d_fxrate = Ret(fxrate)*100) %>%
  as.data.frame()

spread.dat <- left_join(spread.dat, fxrate[c("iso3c", "month_year", "fxrate", "d_fxrate")],
                        by = c("iso3c", "month_year"))
rm(fxrate)

# Quarterly GDP growth (IFS/IMF)

growth_qtr <- read.csv(file = file.path(DATADIR, "Real GDP Growth Quarterly (IMF-IFS).csv"),
                       stringsAsFactors = FALSE)
growth_qtr <- subset(growth_qtr, select = -series)
growth_qtr <- melt(growth_qtr, id.vars = "country", variable.name = "date", value.name = "growth_qtr", factorsAsStrings = FALSE)
growth_qtr$date <- as.Date(as.yearqtr(growth_qtr$date, format = "Q%q.%Y"), frac = 1) # frac=1 converts to end-of-quarter format instead of beginning of quarter
growth_qtr <- growth_qtr[order(growth_qtr$country, growth_qtr$date), ]
growth_qtr$month_year <- format(growth_qtr$date, "%m-%Y")
growth_qtr$iso3c <- countrycode(growth_qtr$country, origin = "country.name", destination = "iso3c")

spread.dat <- left_join(spread.dat, growth_qtr[c("iso3c", "month_year", "growth_qtr")],
                        by = c("iso3c", "month_year"))

spread.dat <- spread.dat %>%
  group_by(iso3c) %>%
  mutate(growth_qtr = na.approx(growth_qtr, maxgap = 2, na.rm = FALSE)) %>% # Linear interpolation for monthly data
  as.data.frame()

rm(growth_qtr)

# GDP (quarterly, constant US$ million, World Bank GEM)

gdp_qtr <- read.csv(file = file.path(DATADIR, "GDP Constant US Million Dollars Quarterly (World Bank - GEM).csv"),
                    stringsAsFactors = FALSE)
gdp_qtr <- subset(gdp_qtr, select = -c(series, series_code))
gdp_qtr <- melt(gdp_qtr, id.vars = c("country", "wbcode"),
                variable.name = "date",
                value.name = "gdp_qtr",
                factorsAsStrings = FALSE)
gdp_qtr$date <- as.Date(as.yearqtr(gdp_qtr$date, format="X%YQ%q"), frac = 1) # frac=1 converts to end-of-quarter format instead of beginning of quarter
gdp_qtr <- gdp_qtr[order(gdp_qtr$country, gdp_qtr$date), ]
gdp_qtr$month_year <- format(gdp_qtr$date, "%m-%Y")
gdp_qtr$iso3c <- countrycode(gdp_qtr$wbcode, origin = "wb", destination = "iso3c")
gdp_qtr <- subset(gdp_qtr, !is.na(iso3c))

spread.dat <- left_join(spread.dat, gdp_qtr[, c("iso3c", "month_year", "gdp_qtr")],
                        by = c("iso3c", "month_year"))

spread.dat <- spread.dat %>%
  group_by(iso3c) %>%
  mutate(gdp_qtr = na.approx(gdp_qtr, maxgap = 2, na.rm = FALSE)) %>% # Linear interpolation for monthly data
  as.data.frame()

rm(gdp_qtr)

# Current account balance (quarterly, US$ million, IFS/IMF)

cur_acct <- read.csv(file = file.path(DATADIR, "Current Account Balance Quarterly (IMF-IFS).csv"),
                     stringsAsFactors = FALSE)
cur_acct <- subset(cur_acct, select = -series)
cur_acct <- melt(cur_acct,
                 id.vars = "country",
                 variable.name = "date",
                 value.name = "cur_acct_qtr",
                 factorsAsStrings = FALSE)
cur_acct$date <- as.Date(as.yearqtr(cur_acct$date, format = "Q%q.%Y"), frac = 1) # frac=1 converts to end-of-quarter format instead of beginning of quarter
cur_acct <- cur_acct[order(cur_acct$country, cur_acct$date), ]
cur_acct$month_year <- format(cur_acct$date, "%m-%Y")
cur_acct$iso3c <- countrycode(cur_acct$country, origin = "country.name", destination = "iso3c")

spread.dat <- left_join(spread.dat, cur_acct[c("iso3c", "month_year", "cur_acct_qtr")],
                        by = c("iso3c", "month_year"))

spread.dat <- spread.dat %>%
  group_by(iso3c) %>%
  mutate(cur_acct_qtr = na.approx(cur_acct_qtr, maxgap = 2, na.rm = FALSE)) %>% # Linear interpolation for monthly data
  as.data.frame()

spread.dat <- mutate(spread.dat, cab_qtr_gdp = (cur_acct_qtr/gdp_qtr)*100)

rm(cur_acct)

# International reserves (monthly, US$ million, IMF/IFS)

reserves <- read.csv(file = file.path(DATADIR, "International Reserves Monthly (IMF-IFS).csv"),
                     stringsAsFactors = FALSE)
reserves <- melt(reserves,
                 id.vars = "country",
                 variable.name = "date",
                 value.name = "reserves",
                 factorsAsStrings = FALSE)
reserves$date <- as.Date(reserves$date, "X%m.%d.%y")
reserves <- reserves[order(reserves$country, reserves$date), ]
reserves$month_year <- format(reserves$date, "%m-%Y")
reserves$iso3c <- countrycode(reserves$country, origin = "country.name", destination = "iso3c")

spread.dat <- left_join(spread.dat, reserves[c("iso3c", "month_year", "reserves")],
                        by = c("iso3c", "month_year"))

spread.dat <- mutate(spread.dat, reserves_gdp_mon = (reserves/gdp_qtr)*100)

rm(reserves)

# Inflation (quarterly, IMF-IFS)

inflation <- read.csv(file = file.path(DATADIR, "Consumer Price Index Quarterly (IFS-IMF).csv"),
                      stringsAsFactors = FALSE)
inflation <- subset(inflation, select = -series)
inflation <- melt(inflation,
                  id.vars = "country",
                  variable.name = "date",
                  value.name = "inflation",
                  factorsAsStrings = FALSE)
inflation$date <- as.Date(as.yearqtr(inflation$date, format = "Q%q.%Y"), frac = 1) # frac=1 converts to end-of-quarter format instead of beginning of quarter
inflation <- inflation[order(inflation$country, inflation$date), ]
inflation$month_year <- format(inflation$date, "%m-%Y")
inflation$iso3c <- countrycode(inflation$country, origin = "country.name", destination = "iso3c")

spread.dat <- left_join(spread.dat, inflation[c("iso3c", "month_year", "inflation")],
                        by = c("iso3c", "month_year"))

spread.dat <- spread.dat %>%
  group_by(iso3c) %>%
  mutate(inflation = na.approx(inflation, maxgap = 2, na.rm = FALSE), # Linear interpolation for monthly data
         d_inflation = c(NA, diff(inflation))) %>% 
  as.data.frame()

rm(inflation)

# Capital account openness (Chinn-Ito index)

kaopen <- import(file = file.path(DATADIR, "KAOPEN 2015.dta"), format = "dta")
kaopen$iso3c <- kaopen$ccode
kaopen$year <- as.character(kaopen$year)

spread.dat <- left_join(spread.dat, kaopen[c("iso3c", "year", "kaopen")],
                        by = c("iso3c", "year"))
rm(kaopen)

# Karcher-Steinberg Kaopen index (modified Chinn-Ito)

kaopen_ks <- read.csv(file = file.path(DATADIR, "Karcher Steinberg Capital Account Openness 2010 update.csv"),
                      stringsAsFactors = FALSE)
kaopen_ks$iso3c <- kaopen_ks$ccode
kaopen_ks$year <- as.character(kaopen_ks$year)

spread.dat <- left_join(spread.dat, kaopen_ks[c("iso3c", "year", "ckaopen2010")],
                        by = c("iso3c", "year"))
rm(kaopen_ks)

# Credit ratings

rating <- import(file = file.path(DATADIR, "Credit Ratings Monthly June 2016.dta"), format = "dta")
rating$month_abb <- month.abb[rating$month]
rating$month_year <- paste(rating$month_abb, rating$year, sep = "-")
rating$date <- as.Date(as.yearmon(rating$month_year, "%b-%Y"))
rating$year <- format(rating$date, "%Y")
rating$month <- format(rating$date, "%m")
rating$iso3c <- countrycode(rating$wbcode, origin = "wb", destination = "iso3c")

rating <- rating %>%
  group_by(iso3c) %>%
  mutate(d_fitchratingnumeric = c(NA, diff(fitchratingnumeric)),
         d_SPratingnumeric = c(NA, diff(SPratingnumeric)),
         d_moodysratingnumeric = c(NA, diff(moodysratingnumeric))) %>%
  as.data.frame()

spread.dat <- left_join(spread.dat, rating[, c("iso3c", "month", "year",
                                               "fitchinvestmentgrade", "moodyinvestmentgrade", "SPinvestmentgrade",
                                               "fitchratingnumeric", "SPratingnumeric", "moodysratingnumeric",
                                               "d_fitchratingnumeric", "d_SPratingnumeric", "d_moodysratingnumeric")],
                        by = c("iso3c", "month", "year"))

rm(rating)

# Worl Development Indicators

WDI <- WDI(country = "all", indicator = c("BN.CAB.XOKA.GD.ZS",
                                          "DT.DOD.DECT.GN.ZS",
                                          "DT.TDS.DECT.EX.ZS",
                                          "DT.DOD.DSTC.IR.ZS",
                                          "NY.GDP.PCAP.CD",
                                          "NY.GDP.MKTP.KD.ZG",
                                          "NY.GDP.MKTP.KD",
                                          "GC.XPN.TOTL.GD.ZS",
                                          "FP.CPI.TOTL.ZG"),
           start = 1990, end = 2018, extra = TRUE)

WDI$year <- as.character(WDI$year)

# Format date for monthly dataset
WDI$month_year <- paste("12-", WDI$year, sep = "")

# Rename variables
colnames(WDI)[colnames(WDI)=="BN.CAB.XOKA.GD.ZS"] <- "cab_annual_gdp"            # Current account balance as %GDP
colnames(WDI)[colnames(WDI)=="DT.DOD.DECT.GN.ZS"] <- "extdebt_gni_annual"        # External debt to GNI ratio
colnames(WDI)[colnames(WDI)=="DT.TDS.DECT.EX.ZS"] <- "debtserv_exports_annual"   # Debt service to exports ratio
colnames(WDI)[colnames(WDI)=="DT.DOD.DSTC.IR.ZS"] <- "shortdebt_reserves_annual" # Short-term debt (% of total reserves)
colnames(WDI)[colnames(WDI)=="NY.GDP.PCAP.CD"] <- "gdppc"                        # GDP per capita
colnames(WDI)[colnames(WDI)=="NY.GDP.MKTP.KD.ZG"] <- "growth_annual"             # GDP growth
colnames(WDI)[colnames(WDI)=="NY.GDP.MKTP.KD"] <- "gdp_annual"                   # GDP
colnames(WDI)[colnames(WDI)=="GC.XPN.TOTL.GD.ZS"] <- "govtcons"                  # Government consumption as %GDP
colnames(WDI)[colnames(WDI)=="FP.CPI.TOTL.ZG"] <- "inflation_annual"             # Inflation (%)

# Merge variables with annual frequency

spread.dat <- left_join(spread.dat, WDI[c("iso3c",
                                          "year",
                                          "cab_annual_gdp",
                                          "gdp_annual",
                                          "growth_annual",
                                          "gdppc",
                                          "govtcons",
                                          "inflation_annual")],
                        by = c("iso3c", "year"))

# Merge variables that will be interpolated for monthly frequency

spread.dat <- left_join(spread.dat, WDI[c("iso3c",
                                          "month_year",
                                          "debtserv_exports_annual",
                                          "extdebt_gni_annual",
                                          "shortdebt_reserves_annual")],
                        by = c("iso3c", "month_year"))

# Linearly interpolate annual debt data to convert to monthly frequency

spread.dat <- spread.dat %>%
  group_by(iso3c) %>%
  mutate(debtserv_exports_annual = na.approx(debtserv_exports_annual, maxgap = 11, na.rm = FALSE),
         extdebt_gni_annual = na.approx(extdebt_gni_annual, maxgap = 11, na.rm = FALSE),
         shortdebt_reserves_annual = na.approx(shortdebt_reserves_annual, maxgap = 11, na.rm = FALSE)) %>%
  as.data.frame()

rm(WDI)

# Debt data from World Bank's Quarterly External Debt Statistics

extdebt_qtr <- read.csv(file = file.path(DATADIR, "World Bank Quarterly External Debt Statistics (Oct 2017).csv"),
                        stringsAsFactors = FALSE)
extdebt_qtr <- subset(extdebt_qtr, select = -c(series, series_code))
extdebt_qtr <- melt(extdebt_qtr, id.vars = c("country", "wbcode"), variable.name = "date", value.name = "extdebt_qtr", factorsAsStrings = FALSE)
extdebt_qtr$date <- as.Date(as.yearqtr(extdebt_qtr$date, format="X%YQ%q"), frac = 1) # frac=1 converts to end-of-quarter format instead of beginning of quarter
extdebt_qtr <- extdebt_qtr[order(extdebt_qtr$country, extdebt_qtr$date), ]
extdebt_qtr$month_year <- format(extdebt_qtr$date, "%m-%Y")
extdebt_qtr$iso3c <- countrycode(extdebt_qtr$wbcode, origin = "wb", destination = "iso3c")

spread.dat <- left_join(spread.dat, extdebt_qtr[c("iso3c", "month_year", "extdebt_qtr")],
                        by = c("iso3c", "month_year"))

spread.dat <- spread.dat %>%
  group_by(iso3c) %>%
  mutate(extdebt_qtr = na.approx(extdebt_qtr, maxgap = 2, na.rm = FALSE)) %>% # Linear interpolation for monthly data
  as.data.frame()

spread.dat <- mutate(spread.dat,
                     extdebt_gdp_qtr = ((extdebt_qtr/1000000)/gdp_qtr)*100)

rm(extdebt_qtr)

# Sovereign debt crises (Laeven & Valencia)

lvcrisis <- read.csv(file = file.path(DATADIR, "Laeven Valencia monthly crisis data (cleaned).csv"))

lvcrisis$iso3c <- countrycode(lvcrisis$wbcode, origin = "wb", destination = "iso3c", warn = TRUE)
lvcrisis$month <- as.character(lvcrisis$month)
lvcrisis$year <- as.character(lvcrisis$year)

spread.dat <- left_join(spread.dat, lvcrisis[c("iso3c", "month", "year",
                                               "sovereigndefault", "sovereigndebtrestructure",
                                               "currencycrisisstart")],
                        by = c("iso3c", "month", "year"))
rm(lvcrisis)

# Polity IV scores

polity <- read.csv(file = file.path(DATADIR, "Polity IV v2016.csv"),
                   stringsAsFactors = FALSE)

polity$iso3c <- countrycode(polity$country, origin = "country.name", destination = "iso3c", warn = TRUE)
polity$year <- as.character(polity$year)

spread.dat <- left_join(spread.dat, polity[c("iso3c", "year", "polity2", "xrcomp", "xropen")],
                        by = c("iso3c", "year"))
rm(polity)


# Spatial lags/diffusion variables ----------------------------------------

categor <- import(file = file.path(DATADIR, "Country categorizations July 2016.dta"), format = "dta")

categor$iso3c <- countrycode(categor$country, origin = "country.name", destination = "iso3c")
categor$year <- as.character(categor$year)

spread.dat <- left_join(spread.dat, categor[c("iso3c", "year", "ftsecategorynumeric", "mscibarracategorynumeric")],
                        by = c("iso3c", "year"))

# Diffusion variable based on FTSE categories for EMBIG and CDS spreads 

cat.var <- "ftsecategorynumeric" # Set category variable to be used
diff.list <- list() # List to store data frames with spatial lags for each time period
monthyears <- unique(spread.dat$month_year[spread.dat$year != "2016"]) # Use all months in the data except for those in 2016 (category variables go up to 2015 only)

for (i in 1:length(monthyears)){
  
  dat <- subset(spread.dat, month_year == monthyears[i], select = c("iso3c", "cdsspread", "embispread", cat.var)) # Get data for each specific month in the dataset
  m <- table(dat[, c("iso3c", cat.var)]) # Create incidence table for initializing affiliation network (1)
  M <- as.matrix(m) # Create incidence table for initializing affiliation network (2)
  net <- graph.incidence(M) # Create network object from incidence table using igraph package
  bi.proj <- bipartite.projection(net) # Create bipartite projection of the affiliation network
  Wmat <- as.matrix(get.adjacency(bi.proj$proj1)) # Extract projection from affiliation network as matrix
  vec.embi <- as.data.frame(dat[, c("embispread")]) # Create vector of EMBI spreads
  vec.embi[is.na(vec.embi)] <- 0 # Fill NAs with zeros, otherwise matrix multiplication won't work
  vec.cds <- as.data.frame(dat[, c("cdsspread")]) # Create vector of CDS spreads
  vec.cds[is.na(vec.cds)] <- 0 # Fill NAs with zeros, otherwise matrix multiplication won't work
  diff <- data.frame(embi_diff = Wmat %*% vec.embi[,1], # Create spatial lag for EMBI spreads
                     cds_diff = Wmat %*% vec.cds[,1], # Create spatial lag for CDS spreads
                     iso3c = dat$iso3c,
                     month_year = monthyears[i],
                     stringsAsFactors = FALSE)
  diff$embi_diff[diff$embi_diff == 0] <- NA # Put back NAs
  diff$cds_diff[diff$cds_diff == 0] <- NA # Put back NAs
  diff.list[[i]] <- diff # Add diffusion variables to list of data frames to be later combined
  
}

ftse.diff <- do.call("rbind", diff.list)
colnames(ftse.diff) <- c("embispreadFTSEdiff", "cdsspreadFTSEdiff", "iso3c", "month_year")
spread.dat <- left_join(spread.dat, ftse.diff,
                        by = c("iso3c", "month_year"))

rm(dat, diff, bi.proj, net, diff.list, ftse.diff, vec.cds, vec.embi, Wmat)

# Diffusion variable based on MSCI categories for EMBIG and CDS spreads

cat.var <- "mscibarracategorynumeric" # Set category variable to be used
diff.list <- list() # List to store data frames with spatial lags for each time period
monthyears <- unique(spread.dat$month_year[spread.dat$year != "2016"]) # Use all months in the data except for those in 2016 (category variables go up to 2015 only)

for (i in 1:length(monthyears)){
  
  dat <- subset(spread.dat, month_year == monthyears[i], select = c("iso3c", "cdsspread", "embispread", cat.var)) # Get data for each specific month in the dataset
  m <- table(dat[, c("iso3c", cat.var)]) # Create incidence table for initializing affiliation network (1)
  M <- as.matrix(m) # Create incidence table for initializing affiliation network (2)
  net <- graph.incidence(M) # Create network object from incidence table using igraph package
  bi.proj <- bipartite.projection(net) # Create bipartite projection of the affiliation network
  Wmat <- as.matrix(get.adjacency(bi.proj$proj1)) # Extract projection from affiliation network as matrix
  vec.embi <- as.data.frame(dat[, c("embispread")]) # Create vector of EMBI spreads
  vec.embi[is.na(vec.embi)] <- 0 # Fill NAs with zeros, otherwise matrix multiplication won't work
  vec.cds <- as.data.frame(dat[, c("cdsspread")]) # Create vector of CDS spreads
  vec.cds[is.na(vec.cds)] <- 0 # Fill NAs with zeros, otherwise matrix multiplication won't work
  diff <- data.frame(embi_diff = Wmat %*% vec.embi[,1], # Create spatial lag (diffusion variable) for EMBI spreads
                     cds_diff = Wmat %*% vec.cds[,1], # Create spatial lag (diffusion variable) for CDS spreads
                     iso3c = dat$iso3c,
                     month_year = monthyears[i],
                     stringsAsFactors = FALSE)
  diff$embi_diff[diff$embi_diff == 0] <- NA # Put back NAs
  diff$cds_diff[diff$cds_diff == 0] <- NA # Put back NAs
  diff.list[[i]] <- diff # Add diffusion variables to list of data frames to be later combined
  
}

msci.diff <- do.call("rbind", diff.list)
colnames(msci.diff) <- c("embispreadMSCIdiff", "cdsspreadMSCIdiff", "iso3c", "month_year")
spread.dat <- left_join(spread.dat, msci.diff,
                        by = c("iso3c", "month_year"))

rm(dat, diff, bi.proj, net, diff.list, msci.diff, vec.cds, vec.embi, Wmat)

# Diffusion variable based on REGIONS for EMBIG and CDS spreads 

qog <- read_dta(file = file.path(DATADIR, "QoG_Std_TS_Jan16.dta"), encoding = "latin1")

colnames(qog)[4] <- "iso3c"
qog2015 <- subset(qog, year == 2015)
spread.dat <- plyr::join(as.data.frame(spread.dat), qog2015[c("iso3c", "ht_region")],
                         by = c("iso3c"), type = "left", match = "first")

cat.var <- "ht_region" # Set category variable to be used
diff.list <- list() # List to store data frames with spatial lags for each time period
monthyears <- unique(spread.dat$month_year)

for (i in 1:length(monthyears)){
  
  dat <- subset(spread.dat, month_year == monthyears[i], select = c("iso3c", "cdsspread", "embispread", cat.var)) # Get data for each specific month in the dataset
  m <- table(dat[, c("iso3c", cat.var)]) # Create incidence table for initializing affiliation network (1)
  M <- as.matrix(m) # Create incidence table for initializing affiliation network (2)
  net <- graph.incidence(M) # Create network object from incidence table using igraph package
  bi.proj <- bipartite.projection(net) # Create bipartite projection of the affiliation network
  Wmat <- as.matrix(get.adjacency(bi.proj$proj1)) # Extract projection from affiliation network as matrix
  vec.embi <- as.data.frame(dat[, c("embispread")]) # Create vector of EMBI spreads
  vec.embi[is.na(vec.embi)] <- 0 # Fill NAs with zeros, otherwise matrix multiplication won't work
  vec.cds <- as.data.frame(dat[, c("cdsspread")]) # Create vector of CDS spreads
  vec.cds[is.na(vec.cds)] <- 0 # Fill NAs with zeros, otherwise matrix multiplication won't work
  diff <- data.frame(embi_diff = Wmat %*% vec.embi[,1], # Create spatial lag (diffusion variable) for EMBI spreads
                     cds_diff = Wmat %*% vec.cds[,1], # Create spatial lag (diffusion variable) for CDS spreads
                     iso3c = dat$iso3c,
                     month_year = monthyears[i],
                     stringsAsFactors = FALSE)
  diff$embi_diff[diff$embi_diff == 0] <- NA # Put back NAs
  diff$cds_diff[diff$cds_diff == 0] <- NA # Put back NAs
  diff.list[[i]] <- diff # Add diffusion variables to list of data frames to be later combined
  
}

region.diff <- do.call("rbind", diff.list)
colnames(region.diff) <- c("embispreadREGIONdiff", "cdsspreadREGIONdiff", "iso3c", "month_year")
spread.dat <- left_join(spread.dat, region.diff,
                        by = c("iso3c", "month_year"))

rm(qog, qog2015, categor, dat, diff, bi.proj, net, diff.list, region.diff, vec.cds, vec.embi, Wmat)


# Add DPI variables converting to monthly frequency -----------------------

DPI <- import(file = file.path(DATADIR, "DPI2015.dta"), format = "dta")

elections <- read.csv(file = file.path(DATADIR, "Elections and Partisanship Data for BCM_BR.csv"),
                      na.strings = c("", " "),
                      stringsAsFactors = FALSE)

# Add info on chief executive from REIGN

reign <- read.csv(file = file.path(DATADIR, "REIGN_2017_01.csv"), stringsAsFactors = FALSE)

reign$ifs <- countrycode(reign$ccode, origin = "cown", destination = "wb", warn = TRUE)

reign$month_year <- paste(reign$month, "-", reign$year, sep = "")
reign$month_year <- as.Date(as.yearmon(reign$month_year, "%m-%Y"))
reign$month_year <- format(reign$month_year, "%m-%Y")

# Leadercode

reign2 <- reign %>%
  group_by(ccode, year) %>%
  dplyr::filter(month == min(month)) %>%
  dplyr::summarise(leadercode = max(leadercode)) %>%
  as.data.frame()

reign2$ifs <- countrycode(reign2$ccode, origin = "cown", destination = "wb", warn = TRUE)

DPI <- plyr::join(DPI, reign2[c("ifs", "year", "leadercode")],
                  by = c("ifs", "year"), type = "left", match = "first")

# "months since last election lost"
# (Loss variable is logged in REIGN)

reign$loss_log <- reign$loss
reign$loss <- exp(reign$loss)

spread.dat <- plyr::join(spread.dat, reign[, c("ifs", "month_year", "loss", "loss_log")],
                         by = c("ifs", "month_year"),
                         type = "left", match = "first")

rm(reign, reign2)

# Code partisan switch variables from DPI

DPI$execrlc2 <- DPI$execrlc
DPI$execrlc2[DPI$execrlc2 == -999 | DPI$execrlc2 == 0] <- NA

# Change in executive ideology
# Switch in any direction

DPI <- DPI %>%
  group_by(ifs) %>%
  mutate(execrlc_chng = c(NA, diff(execrlc2))) %>%
  as.data.frame()

DPI$ideol_switch <- ifelse(DPI$execrlc_chng != 0, 1, 0)

DPI <- DPI %>%
  group_by(ifs, leadercode) %>%
  mutate(ideol_switch = max(ideol_switch, na.rm = TRUE)) %>%
  as.data.frame()

DPI$ideol_switch[is.infinite(DPI$ideol_switch)] <- NA

# Switch to the left

DPI$ideol_switch_left <- ifelse(DPI$ideol_switch == 1 & DPI$execrlc2 == 3, 1, 0)

# Change in governing party

DPI$execme[DPI$execme == -999] <- NA

same_party <- function(x){ #Daily return function
  c(NA, ifelse(x[2:length(x)] != x[1:length(x)-1], 1, 0))
}

DPI <- DPI %>%
  group_by(ifs) %>%
  mutate(party_switch = same_party(execme)) %>%
  as.data.frame()

DPI <- DPI %>%
  group_by(ifs, leadercode) %>%
  mutate(party_switch = max(party_switch, na.rm = TRUE)) %>%
  as.data.frame()

DPI$party_switch[is.infinite(DPI$party_switch)] <- NA

DPI$newexec_year <- as.numeric(DPI$year)

# Create single unified variable of election occurrence (both 1st and 2nd rounds included when it's the case)
# Executive elections

elections$execelec1 <- as.numeric(elections$execelec1)
elections$execelec2 <- as.numeric(elections$execelec2)
elections <- mutate(elections, execelec = execelec1 + execelec2)
table(elections$execelec)
elections$execelec[elections$execelec == 2] <- 1

elections$execelec1date <- as.Date(elections$execelec1date, "%m/%d/%y")
elections$execelec2date <- as.Date(elections$execelec2date, "%m/%d/%y")
elections$execelecdate <- elections$execelec2date
elections$execelecdate[is.na(elections$execelecdate)] <- elections$execelec1date[is.na(elections$execelecdate)]

# Legislative elections

elections$legelec1 <- as.numeric(elections$legelec1)
elections$legelec2 <- as.numeric(elections$legelec2)
elections <- mutate(elections, legelec = legelec1 + legelec2)
table(elections$execelec)

elections$legelec1date <- as.Date(elections$legelec1date, "%m/%d/%y")
elections$legelec2date <- as.Date(elections$legelec2date, "%m/%d/%y")
elections$legelecdate <- elections$legelec1date
elections$execelecdate[is.na(elections$execelecdate)] <- elections$execelec2date[is.na(elections$execelecdate)]

clean_elec <- subset(elections, execelec1==1 | execelec2==1 | legelec1==1 | legelec2==1)

clean_elec$execelec[clean_elec$country == "Argentina" & clean_elec$year == 2015 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Argentina" & clean_elec$year == 2015 & clean_elec$month == 10] <- NA

clean_elec$execelec[clean_elec$country == "Bulgaria" & clean_elec$year == 1996 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Bulgaria" & clean_elec$year == 1996 & clean_elec$month == 10] <- NA

clean_elec$execelec[clean_elec$country == "Chile" & clean_elec$year == 1999] <- 0
clean_elec$execelecdate[clean_elec$country == "Chile" & clean_elec$year == 1999] <- NA
clean_elec$execelec[clean_elec$country == "Chile" & clean_elec$year == 2005] <- 0
clean_elec$execelecdate[clean_elec$country == "Chile" & clean_elec$year == 2005] <- NA
clean_elec$execelec[clean_elec$country == "Chile" & clean_elec$year == 2009] <- 0
clean_elec$execelecdate[clean_elec$country == "Chile" & clean_elec$year == 2009] <- NA
clean_elec$execelec[clean_elec$country == "Chile" & clean_elec$year == 2013 & clean_elec$month == 11] <- 0
clean_elec$execelecdate[clean_elec$country == "Chile" & clean_elec$year == 2013 & clean_elec$month == 11] <- NA

clean_elec$execelec[clean_elec$country == "Colombia" & clean_elec$year == 1998 & clean_elec$month == 5] <- 0
clean_elec$execelecdate[clean_elec$country == "Colombia" & clean_elec$year == 1998 & clean_elec$month == 5] <- NA
clean_elec$execelec[clean_elec$country == "Colombia" & clean_elec$year == 2010 & clean_elec$month == 5] <- 0
clean_elec$execelecdate[clean_elec$country == "Colombia" & clean_elec$year == 2010 & clean_elec$month == 5] <- NA
clean_elec$execelec[clean_elec$country == "Colombia" & clean_elec$year == 2014 & clean_elec$month == 5] <- 0
clean_elec$execelecdate[clean_elec$country == "Colombia" & clean_elec$year == 2014 & clean_elec$month == 5] <- NA

clean_elec$execelec[clean_elec$country == "Costa Rica" & clean_elec$year == 2014 & clean_elec$month == 2] <- 0
clean_elec$execelecdate[clean_elec$country == "Costa Rica" & clean_elec$year == 2014 & clean_elec$month == 2] <- NA

clean_elec$legelec[clean_elec$country == "Cote d'Ivoire" & clean_elec$year == 2000 & clean_elec$month == 12] <- 0
clean_elec$legelecdate[clean_elec$country == "Cote d'Ivoire" & clean_elec$year == 2000 & clean_elec$month == 12] <- NA
clean_elec$execelec[clean_elec$country == "Cote d'Ivoire" & clean_elec$year == 2010 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Cote d'Ivoire" & clean_elec$year == 2010 & clean_elec$month == 10] <- NA

clean_elec$execelec[clean_elec$country == "Croatia" & clean_elec$year == 2000 & clean_elec$month == 1] <- 0
clean_elec$execelecdate[clean_elec$country == "Croatia" & clean_elec$year == 2000 & clean_elec$month == 1] <- NA
clean_elec$execelec[clean_elec$country == "Croatia" & clean_elec$year == 2009 & clean_elec$month == 12] <- 0
clean_elec$execelecdate[clean_elec$country == "Croatia" & clean_elec$year == 2009 & clean_elec$month == 12] <- NA
clean_elec$execelec[clean_elec$country == "Croatia" & clean_elec$year == 2014 & clean_elec$month == 12] <- 0
clean_elec$execelecdate[clean_elec$country == "Croatia" & clean_elec$year == 2014 & clean_elec$month == 12] <- NA

clean_elec$execelec[clean_elec$country == "Ecuador" & clean_elec$year == 1996 & clean_elec$month == 5] <- 0
clean_elec$execelecdate[clean_elec$country == "Ecuador" & clean_elec$year == 1996 & clean_elec$month == 5] <- NA
clean_elec$execelec[clean_elec$country == "Ecuador" & clean_elec$year == 1998 & clean_elec$month == 5] <- 0
clean_elec$execelecdate[clean_elec$country == "Ecuador" & clean_elec$year == 1998 & clean_elec$month == 5] <- NA
clean_elec$execelec[clean_elec$country == "Ecuador" & clean_elec$year == 2002 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Ecuador" & clean_elec$year == 2002 & clean_elec$month == 10] <- NA
clean_elec$execelec[clean_elec$country == "Ecuador" & clean_elec$year == 2006 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Ecuador" & clean_elec$year == 2006 & clean_elec$month == 10] <- NA

clean_elec$legelec[clean_elec$country == "Egypt" & clean_elec$year == 2005 & clean_elec$month == 11] <- 0
clean_elec$legelecdate[clean_elec$country == "Egypt" & clean_elec$year == 2005 & clean_elec$month == 11] <- NA
clean_elec$legelec[clean_elec$country == "Egypt" & clean_elec$year == 2010 & clean_elec$month == 11] <- 0
clean_elec$legelecdate[clean_elec$country == "Egypt" & clean_elec$year == 2010 & clean_elec$month == 11] <- NA
clean_elec$legelec[clean_elec$country == "Egypt" & clean_elec$year == 2011 & clean_elec$month == 11] <- 0
clean_elec$legelecdate[clean_elec$country == "Egypt" & clean_elec$year == 2011 & clean_elec$month == 11] <- NA
clean_elec$execelec[clean_elec$country == "Egypt" & clean_elec$year == 2012 & clean_elec$month == 5] <- 0
clean_elec$execelecdate[clean_elec$country == "Egyupt" & clean_elec$year == 2012 & clean_elec$month == 5] <- NA
clean_elec$legelec[clean_elec$country == "Egypt" & clean_elec$year == 2015 & clean_elec$month == 10] <- 0
clean_elec$legelecdate[clean_elec$country == "Egypt" & clean_elec$year == 2015 & clean_elec$month == 10] <- NA

clean_elec$execelec[clean_elec$country == "El Salvador" & clean_elec$year == 2014 & clean_elec$month == 2] <- 0
clean_elec$execelecdate[clean_elec$country == "El Salvador" & clean_elec$year == 2014 & clean_elec$month == 2] <- NA

clean_elec$execelec[clean_elec$country == "Guatemala" & clean_elec$year == 2011 & clean_elec$month == 9] <- 0
clean_elec$execelecdate[clean_elec$country == "Guatemala" & clean_elec$year == 2011 & clean_elec$month == 9] <- NA
clean_elec$execelec[clean_elec$country == "Guatemala" & clean_elec$year == 2015 & clean_elec$month == 9] <- 0
clean_elec$execelecdate[clean_elec$country == "Guatemala" & clean_elec$year == 2015 & clean_elec$month == 9] <- NA

clean_elec$execelec[clean_elec$country == "Peru" & clean_elec$year == 2000 & clean_elec$month == 4] <- 0
clean_elec$execelecdate[clean_elec$country == "Peru" & clean_elec$year == 2000 & clean_elec$month == 4] <- NA
clean_elec$execelec[clean_elec$country == "Peru" & clean_elec$year == 2001 & clean_elec$month == 4] <- 0
clean_elec$execelecdate[clean_elec$country == "Peru" & clean_elec$year == 2001 & clean_elec$month == 4] <- NA
clean_elec$execelec[clean_elec$country == "Peru" & clean_elec$year == 2006 & clean_elec$month == 4] <- 0
clean_elec$execelecdate[clean_elec$country == "Peru" & clean_elec$year == 2006 & clean_elec$month == 4] <- NA
clean_elec$execelec[clean_elec$country == "Peru" & clean_elec$year == 2011 & clean_elec$month == 4] <- 0
clean_elec$execelecdate[clean_elec$country == "Peru" & clean_elec$year == 2011 & clean_elec$month == 4] <- NA

clean_elec$execelec[clean_elec$country == "Poland" & clean_elec$year == 2010 & clean_elec$month == 6] <- 0
clean_elec$execelecdate[clean_elec$country == "Poland" & clean_elec$year == 2010 & clean_elec$month == 6] <- NA

clean_elec$execelec[clean_elec$country == "Romania" & clean_elec$year == 2004 & clean_elec$month == 11] <- 0
clean_elec$execelecdate[clean_elec$country == "Romania" & clean_elec$year == 2004 & clean_elec$month == 11] <- NA
clean_elec$execelec[clean_elec$country == "Romania" & clean_elec$year == 2009 & clean_elec$month == 11] <- 0
clean_elec$execelecdate[clean_elec$country == "Romania" & clean_elec$year == 2009 & clean_elec$month == 11] <- NA

clean_elec$execelec[clean_elec$country == "Senegal" & clean_elec$year == 2012 & clean_elec$month == 2] <- 0
clean_elec$execelecdate[clean_elec$country == "Senegal" & clean_elec$year == 2012 & clean_elec$month == 2] <- NA

clean_elec$execelec[clean_elec$country == "Serbia" & clean_elec$year == 2008 & clean_elec$month == 1] <- 0
clean_elec$execelecdate[clean_elec$country == "Serbia" & clean_elec$year == 2008 & clean_elec$month == 1] <- NA

clean_elec$execelec[clean_elec$country == "Slovakia" & clean_elec$year == 2009 & clean_elec$month == 3] <- 0
clean_elec$execelecdate[clean_elec$country == "Slovakia" & clean_elec$year == 2009 & clean_elec$month == 3] <- NA

clean_elec$execelec[clean_elec$country == "Tunisia" & clean_elec$year == 2014 & clean_elec$month == 11] <- 0
clean_elec$execelecdate[clean_elec$country == "Tunisia" & clean_elec$year == 2014 & clean_elec$month == 11] <- NA

clean_elec$execelec[clean_elec$country == "Ukraine" & clean_elec$year == 2004 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Ukraine" & clean_elec$year == 2004 & clean_elec$month == 10] <- NA
clean_elec$execelec[clean_elec$country == "Ukraine" & clean_elec$year == 2004 & clean_elec$month == 11] <- 0
clean_elec$execelecdate[clean_elec$country == "Ukraine" & clean_elec$year == 2004 & clean_elec$month == 11] <- NA
clean_elec$execelec[clean_elec$country == "Ukraine" & clean_elec$year == 2010 & clean_elec$month == 1] <- 0
clean_elec$execelecdate[clean_elec$country == "Ukraine" & clean_elec$year == 2010 & clean_elec$month == 1] <- NA

clean_elec$execelec[clean_elec$country == "Uruguay" & clean_elec$year == 2009 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Uruguay" & clean_elec$year == 2009 & clean_elec$month == 10] <- NA
clean_elec$execelec[clean_elec$country == "Uruguay" & clean_elec$year == 2014 & clean_elec$month == 10] <- 0
clean_elec$execelecdate[clean_elec$country == "Uruguay" & clean_elec$year == 2014 & clean_elec$month == 10] <- NA

clean_elec$newexec_month <- ifelse(as.numeric(format(clean_elec$execelecdate, "%d")) < 15,
                                    as.numeric(format(clean_elec$execelecdate, "%m")),
                                    as.numeric(format(clean_elec$execelecdate, "%m")) + 1)

clean_elec$newleg_month <- ifelse(as.numeric(format(clean_elec$legelecdate, "%d")) < 15,
                                   as.numeric(format(clean_elec$legelecdate, "%m")),
                                   as.numeric(format(clean_elec$legelecdate, "%m")) + 1)

# Merge executive variables from DPI into spread.dat data frame,
# converting yearly observations into monthly, with monthly changes
# recorded in the month when new election results are revealed

# execrlc: executive ideology
# execage: Party age
# prtyin: How long has party of chief executive been in office?
# tensys: How long has the country been autocratic/democratic?

clean_elec$year <- as.character(clean_elec$year)

spread.dat <- plyr::join(spread.dat, subset(clean_elec, execelec == 1)[c("iso3c", "year", "newexec_month")],
                         by = c("iso3c", "year"), type = "left", match = "first")

spread.dat$month_num <- as.numeric(spread.dat$month)
spread.dat$year_num <- as.numeric(spread.dat$year)
spread.dat$newexec_year <- ifelse(spread.dat$month_num < spread.dat$newexec_month,
                                  spread.dat$year_num,
                                  spread.dat$year_num + 1)
spread.dat$newexec_year[is.na(spread.dat$newexec_year)] <- spread.dat$year_num[is.na(spread.dat$newexec_year)]
DPI$newexec_year <- as.numeric(DPI$year)

spread.dat <- plyr::join(spread.dat, DPI[, c("ifs", "newexec_year",
                                             "percent1", "percentl", "execrlc",
                                             "prtyin", "execage", "tensys",
                                             "ideol_switch", "ideol_switch_left",
                                             "party_switch")],
                         by = c("ifs", "newexec_year"),
                         type = "left", match = "first")

# Merge legislative variables from DPI with spread.dat data frame,
# converting yearly observations into monthly, with monthly changes
# recorded in the month when new election results are revealed

spread.dat <- plyr::join(spread.dat, subset(clean_elec, legelec == 1)[c("iso3c", "year", "newleg_month")],
                         by = c("iso3c", "year"), type = "left", match = "first")

spread.dat$newleg_year <- ifelse(spread.dat$month_num < spread.dat$newleg_month,
                                  spread.dat$year_num,
                                  spread.dat$year_num + 1)
spread.dat$newleg_year[is.na(spread.dat$newleg_year)] <- spread.dat$year_num[is.na(spread.dat$newleg_year)]

DPI$newleg_year <- as.numeric(DPI$year)

spread.dat <- plyr::join(spread.dat, DPI[, c("ifs", "newleg_year", "herfgov", "govfrac", "numvote", "gov1rlc", "gov1nat",
                                             "gov1rurl", "gov1age", "gov2rlc", "gov3rlc", "herfopp", "oppfrac", "oppvote",
                                             "opp1rlc", "opp1nat", "opp1rurl", "opp1age", "herftot", "frac", "oppmajh",
                                             "maj", "partyage")],
                         by = c("ifs", "newleg_year"), type = "left", match = "first")

# Merge executive ideology from Baker & Greene 2011 (Latin America)
# into spread.dat data frame, converting yearly observations into monthly,
# with monthly changes recorded in the month when new election results are revealed

baker <- read.csv(file = file.path(DATADIR, "Baker Greene 2011 - Latin America Party Ideology Scores (Padded).csv"),
                  stringsAsFactors = FALSE)

baker$newexec_year <- as.numeric(baker$year)
baker$iso3c <- countrycode(baker$country, origin = "country.name", destination = "iso3c", warn = TRUE)
baker$baker_ideol <- baker$ideology_score * -1  # invert scale so that higher values are more left-wing

baker_region <- baker %>%
  group_by(newexec_year) %>%
  dplyr::summarise(baker_ideol_region = median(baker_ideol, na.rm = TRUE)) %>%
  as.data.frame()

baker <- baker %>%
  group_by(iso3c) %>%
  mutate(baker_ideol_chng = c(NA, diff(baker_ideol))) %>%
  as.data.frame()

baker <- plyr::join(baker, baker_region[, c("newexec_year", "baker_ideol_region")],
                    by = c("newexec_year"), type = "left", match = "first")

spread.dat <- plyr::join(spread.dat, baker[, c("iso3c", "newexec_year", "baker_ideol", "baker_ideol_region")],
                         by = c("iso3c", "newexec_year"), type = "left", match = "first")
rm(baker, baker_region)

# Code months in office

DPIext <- import(file = file.path(DATADIR, "DPI2015 (Stata13) - Extended.dta"), format = "dta") # Load DPI version that was extended to monthly frequency (in Stata)

DPIext$month_year <- paste(DPIext$month, DPIext$year, sep = "-")
DPIext$date <- as.Date(as.yearmon(DPIext$month_year, "%m-%Y"))
DPIext$month_year <- format(DPIext$date, "%m-%Y")

# Turnover coding from our own manual coding

clean_elec$turnoverBCM <- 0
clean_elec$turnoverBCM[clean_elec$exec_turnover == 0 | clean_elec$exec_turnover == 1 |
                         clean_elec$exec_turnover == 2 | clean_elec$exec_turnover == 3] <- 1 # Code as leader turnover cases coded as 0, 1, 2 or 3 in V-Dem (check codebook)

DPIext$year <- as.character(DPIext$year)

DPIext <- plyr::join(DPIext, clean_elec[c("ifs", "year", "month", "turnoverBCM")],
                     by = c("ifs", "month", "year"), type = "left", match = "first")

DPIext$turnoverBCM[is.na(DPIext$turnoverBCM)] <- 0

# Turnover coding from V-Dem

vdem <- read.csv(file = file.path(DATADIR, "V-Dem-CD-v8.csv"), stringsAsFactors = FALSE)

vdem$historical_date <- as.Date(vdem$historical_date, "%Y-%m-%d")
vdem$month_year <- format(vdem$historical_date, "%m-%Y")
vdem$ifs <- countrycode(vdem$country_text_id, origin = "iso3c", destination = "wb", warn = TRUE)
vdem$iso3c <- countrycode(vdem$country_text_id, origin = "iso3c", destination = "iso3c", warn = TRUE)

vdem2 <- subset(vdem, select = c(country_name, country_text_id, historical_date, month_year, iso3c, ifs,
                                v2eltvrexn_0, v2eltvrexn_1, v2eltvrexn_2, v2eltvrexn_3, v2eltvrexn_4,
                                v2elvotlrg, v2elvotsml,  # Vote share of winner and runner-up in 1st round of Presidential election
                                v2ellovtlg, v2ellovtsm)) # Vote share of largest and second largest party in Parliamentary election
vdem2 <- subset(vdem2, !is.na(v2eltvrexn_0) | !is.na(v2eltvrexn_1) | !is.na(v2eltvrexn_2) | !is.na(v2eltvrexn_3) | !is.na(v2eltvrexn_4))

vdem2$turnoverVDEM <- 0
vdem2$turnoverVDEM[vdem2$v2eltvrexn_0 == 1 | vdem2$v2eltvrexn_1 == 1 |
                    vdem2$v2eltvrexn_2 == 1 | vdem2$v2eltvrexn_3 == 1] <- 1 # Code as leader turnover cases coded as 0, 1, 2 or 3 in V-Dem (check codebook)

DPIext <- plyr::join(DPIext, vdem2[, c("ifs", "month_year", "turnoverVDEM")],
                     by = c("ifs", "month_year"), type = "left", match = "first")

DPIext$turnoverVDEM[is.na(DPIext$turnoverVDEM)] <- 0

# Create turnover variable in DPIext from combination of above turnover variables

DPIext$turnover <- NA
DPIext$turnover <- ifelse(is.na(DPIext$turnoverBCM),
                          DPIext$turnoverBCM,
                          DPIext$turnoverVDEM)

# Now months in office counter since last election

DPIext <- DPIext %>%
  group_by(ifs, cumsum(turnover == 1)) %>% 
  dplyr::mutate(monthsoffc = row_number()) %>%
  as.data.frame()

# Adjust exceptional cases

DPIext$yrsoffc[DPIext$yrsoffc == -999] <- NA

# Angola
DPIext$monthsoffc[DPIext$country == "Angola"] <- DPIext$yrsoffc[DPIext$country == "Angola"]*12 - (12 - DPIext$month[DPIext$country == "Angola"])

# Merge into main dataset

spread.dat <- plyr::join(spread.dat, DPIext[c("ifs", "month_year", "monthsoffc")],
                         by = c("ifs", "month_year"), type = "left", match = "all")

unique(spread.dat$country[spread.dat$monthsoffc > 200 & as.numeric(spread.dat$year) > 1990])

rm(DPIext)

# Fix coding mistakes
# Turkey (duplicate entries in 2015)

spread.dat <- spread.dat[-which(spread.dat$country == "Turkey" &
                                  as.numeric(spread.dat$year) == 2015 & spread.dat$newleg_month == 6), ]

# Recode DPI variables

table(spread.dat$execrlc)
table(spread.dat$execage)
table(spread.dat$prtyin)
table(spread.dat$tensys)

# Left executive 1 (left = 1, center/right = 0)
spread.dat$execleft <- NA
spread.dat$execleft[spread.dat$execrlc == 3] <- 1
spread.dat$execleft[spread.dat$execrlc == 1 | spread.dat$execrlc == 2 | spread.dat$execrlc == 0] <- 0

# Left executive 2 (left/center = 1, right = 0)
spread.dat$execleft2 <- NA
spread.dat$execleft2[spread.dat$execrlc == 3 | spread.dat$execrlc == 2] <- 1
spread.dat$execleft2[spread.dat$execrlc == 1 | spread.dat$execrlc == 0] <- 0

# Right executive (right = 1, center/left = 0)
spread.dat$execright <- NA
spread.dat$execright[spread.dat$execrlc == 1] <- 1
spread.dat$execright[spread.dat$execrlc == 3 | spread.dat$execrlc == 2 | spread.dat$execrlc == 0] <- 0

# Party age
spread.dat$execage[spread.dat$execage == -999] <- NA

# How long has the country been autocratic/democratic?
spread.dat$tensys[spread.dat$tensys == -999] <- NA

# How long has party of chief executive been in office?
spread.dat$prtyin[spread.dat$prtyin == -999] <- NA

# Political regime from V-Dem

spread.dat <- plyr::join(spread.dat, vdem[, c("iso3c", "year", "v2x_libdem")],
                         by = c("iso3c", "year"), type = "left", match = "first")

# Political constraints (Henisz)

polcon <- read.csv(file.path(DATADIR, "POLCON 2017.csv"), stringsAsFactors = FALSE)
polcon$iso3c <- countrycode(polcon$polity_country, origin = "country.name", destination = "iso3c", warn = TRUE)

spread.dat <- plyr::join(spread.dat, polcon[, c("iso3c", "year", "polconiii", "polconv")],
                         by = c("iso3c", "year"), type = "left", match = "first")
rm(polcon)

# Central bank independence (Bodea & Hicks)

cbi <- import(file.path(DATADIR, "Bodea Hicks CBI 1972-2015.dta"), format = "dta")
cbi$iso3c <- countrycode(cbi$country, origin = "country.name", destination = "iso3c", warn = TRUE)

spread.dat <- plyr::join(spread.dat, cbi[, c("iso3c", "year", "lvau", "lvaw")],
                         by = c("iso3c", "year"), type = "left", match = "first")
rm(cbi)

# Lag dependent variables

spread.dat <- spread.dat %>%
  group_by(country) %>%
  mutate(cdsspread.l = c(NA, cdsspread[1:length(cdsspread) - 1]),
         embispread.l = c(NA, embispread[1:length(embispread) - 1])) %>%
  as.data.frame()

# Code election dummy variable (6 months before election, election month included)

elections$month_year <- format(as.Date(elections$spread_date, "%m/%d/%y"), "%m-%Y")

spread.dat <- plyr::join(spread.dat, elections[, c("iso3c", "month_year", "execelec", "legelec")],
                         by = c("iso3c", "month_year"), type = "left", match = "first")

spread.dat$execelec_window <- ifelse(spread.dat$execelec == 1 |
                                       dplyr::lead(spread.dat$execelec, n = 1) == 1 |
                                       dplyr::lead(spread.dat$execelec, n = 2) == 1 |
                                       dplyr::lead(spread.dat$execelec, n = 3) == 1 |
                                       dplyr::lead(spread.dat$execelec, n = 4) == 1 |
                                       dplyr::lead(spread.dat$execelec, n = 5) == 1, 1, 0)

spread.dat$legelec_window <- ifelse(spread.dat$legelec == 1 |
                                      dplyr::lead(spread.dat$legelec, n = 1) == 1 |
                                      dplyr::lead(spread.dat$legelec, n = 2) == 1 |
                                      dplyr::lead(spread.dat$legelec, n = 3) == 1 |
                                      dplyr::lead(spread.dat$legelec, n = 4) == 1 |
                                      dplyr::lead(spread.dat$legelec, n = 5) == 1, 1, 0)

spread.dat <- plyr::join(spread.dat, DPI[, c("ifs", "year", "system")],
                         by = c("ifs", "year"), type = "left", match = "first")

spread.dat$election_window <- ifelse(spread.dat$system == 0, spread.dat$execelec_window,
                                     ifelse(spread.dat$system == 1 | spread.dat$system == 2, spread.dat$legelec_window, 0))

spread.dat$election_window[is.na(spread.dat$election_window)] <- 0

# Close elections

elections <- plyr::join(elections, vdem2[,c("iso3c", "month_year", "v2elvotlrg", "v2elvotsml", "v2ellovtlg", "v2ellovtsm")],
                        by = c("iso3c", "month_year"),
                        type = "left", match = "first")

elections$president_margin <- elections$v2elvotlrg - elections$v2elvotsml

elections$president_close5 <- ifelse(elections$president_margin <= 5, 1, 0)   # Close = margin < 5
elections$president_close10 <- ifelse(elections$president_margin <= 10, 1, 0) # Close = margin < 10

elections$parliament_margin <- elections$v2ellovtlg - elections$v2ellovtsm

elections$parliament_close5 <- ifelse(elections$parliament_margin <= 5, 1, 0)   # Close = margin < 5
elections$parliament_close10 <- ifelse(elections$parliament_margin <= 10, 1, 0) # Close = margin < 10

spread.dat <- plyr::join(spread.dat, elections[, c("iso3c", "month_year",
                                                   "president_close5", "president_close10",
                                                   "parliament_close5", "parliament_close10")],
                         by = c("iso3c", "month_year"),
                         type = "left", match = "first")

# Close presidential election (6-month window)

spread.dat$pres_close5_window <- ifelse(spread.dat$president_close5 == 1 |
                                        dplyr::lead(spread.dat$president_close5, n = 1) == 1 |
                                        dplyr::lead(spread.dat$president_close5, n = 2) == 1 |
                                        dplyr::lead(spread.dat$president_close5, n = 3) == 1 |
                                        dplyr::lead(spread.dat$president_close5, n = 4) == 1 |
                                        dplyr::lead(spread.dat$president_close5, n = 5) == 1, 1, 0)

spread.dat$pres_close10_window <- ifelse(spread.dat$president_close10 == 1 |
                                          dplyr::lead(spread.dat$president_close10, n = 1) == 1 |
                                          dplyr::lead(spread.dat$president_close10, n = 2) == 1 |
                                          dplyr::lead(spread.dat$president_close10, n = 3) == 1 |
                                          dplyr::lead(spread.dat$president_close10, n = 4) == 1 |
                                          dplyr::lead(spread.dat$president_close10, n = 5) == 1, 1, 0)

spread.dat$pres_close5_window[is.na(spread.dat$pres_close5_window)] <- 0
spread.dat$pres_close10_window[is.na(spread.dat$pres_close10_window)] <- 0

# Close parliamentary election (6-month window)

spread.dat$par_close5_window <- ifelse(spread.dat$parliament_close5 == 1 |
                                          dplyr::lead(spread.dat$parliament_close5, n = 1) == 1 |
                                          dplyr::lead(spread.dat$parliament_close5, n = 2) == 1 |
                                          dplyr::lead(spread.dat$parliament_close5, n = 3) == 1 |
                                          dplyr::lead(spread.dat$parliament_close5, n = 4) == 1 |
                                          dplyr::lead(spread.dat$parliament_close5, n = 5) == 1, 1, 0)

spread.dat$par_close10_window <- ifelse(spread.dat$president_close10 == 1 |
                                           dplyr::lead(spread.dat$parliament_close10, n = 1) == 1 |
                                           dplyr::lead(spread.dat$parliament_close10, n = 2) == 1 |
                                           dplyr::lead(spread.dat$parliament_close10, n = 3) == 1 |
                                           dplyr::lead(spread.dat$parliament_close10, n = 4) == 1 |
                                           dplyr::lead(spread.dat$parliament_close10, n = 5) == 1, 1, 0)

spread.dat$par_close5_window[is.na(spread.dat$par_close5_window)] <- 0
spread.dat$par_close10_window[is.na(spread.dat$par_close10_window)] <- 0

# Merge close presidential and parliamentary elections into single variable

spread.dat$close_election5 <- spread.dat$pres_close5_window
spread.dat$close_election5[spread.dat$close_election5 == 0] <- spread.dat$par_close5_window[spread.dat$close_election5 == 0]

spread.dat$close_election10 <- spread.dat$pres_close10_window
spread.dat$close_election10[spread.dat$close_election10 == 0] <- spread.dat$par_close10_window[spread.dat$close_election10 == 0]

# Adjust partisanship and time in office variables
# Partisanship of the winning candidate (and time in office) is coded backward
# to 3 months before the election to capture possible market reactions to leading candidate

spread.dat <- spread.dat %>%
  group_by(country) %>%
  mutate(execleft_adj = dplyr::lead(execleft, n = 3),
         execleft2_adj = dplyr::lead(execleft2, n = 3),
         monthsoffc_adj = dplyr::lead(monthsoffc, n = 3)) %>%
  as.data.frame()

# Differenced spreads

spread.dat <- spread.dat %>%
  group_by(country) %>%
  mutate(d_cdsspread = c(NA, diff(cdsspread)),
         d_embispread = c(NA, diff(embispread)),
         d_embispreadREGIONdiff = c(NA, diff(embispreadREGIONdiff)),
         d_cdsspreadREGIONdiff = c(NA, diff(cdsspreadREGIONdiff)),
         d_energy_index = c(NA, diff(energy_index))) %>%
  as.data.frame()

# Differenced interpolated variables

spread.dat <- spread.dat %>%
  group_by(country) %>%
  mutate(d_growth_qtr = c(NA, diff(growth_qtr)),
         d_cab_qtr_gdp = c(NA, diff(cab_qtr_gdp)),
         d_extdebt_gdp_qtr = c(NA, diff(embispreadREGIONdiff)),
         d_cab_annual_gdp = c(NA, diff(cab_annual_gdp)),
         d_extdebt_gni_annual = c(NA, diff(extdebt_gni_annual)),
         d_shortdebt_reserves_annual = c(NA, diff(shortdebt_reserves_annual)),
         d_growth_annual = c(NA, diff(growth_annual))) %>%
  as.data.frame()

rm(vdem, vdem2, DPI, elections, clean_elec)

# Add populism data

popul <- read.csv(file.path(DATADIR, "Populist government_v2.1.csv"),
                  stringsAsFactors = FALSE)

popul$year <- as.character(popul$year)
popul$iso3c <- countrycode(popul$COWcode,
                           origin = "cown",
                           destination = "iso3c",
                           warn = TRUE)
popul$populist <- popul$patb3
popul <- popul[order(popul$iso3c, popul$year), ]

spread.dat <- left_join(spread.dat,
                        popul[c("year", "iso3c", "populist")],
                        by = c("year", "iso3c"),
                        na_matches = "never")

# Save data

save(spread.dat, file = file.path(DATADIR, "EMBIG and CDS Dataset.RData"))
write.csv(spread.dat, file = file.path(DATADIR, "EMBIG and CDS Dataset.csv"), row.names = FALSE, na = "")

